####### SCRIPT FOR: 
####### Expert SURVEY: EXPECTATIONS FROM PERFORMANCE OF EL. SYSTEMS

rm(list=ls())
gc()

setwd("C:/Users/miros/Desktop/Research/From Duverger to the Seat Product - Seeking a Pattern in Experts Evaluation of Electoral Systems")

#DESCRIPTIVE STATISTICS
library(readxl)
data_ind <- read_excel("expert_survey_data_and_codebook.xlsx")
#View(data_ind)
names(data_ind)

#Recoding "N/A" into missing values
data_ind$C1_s1 <- as.numeric(data_ind$C1_s1)
data_ind$C2_s1 <- as.numeric(data_ind$C2_s1)
data_ind$C3_s1 <- as.numeric(data_ind$C3_s1)
data_ind$C4_s1 <- as.numeric(data_ind$C4_s1)
data_ind$C5_s1 <- as.numeric(data_ind$C5_s1)
data_ind$C6_s1 <- as.numeric(data_ind$C6_s1)

data_ind$C1_Ns <- as.numeric(data_ind$C1_Ns)
data_ind$C2_Ns <- as.numeric(data_ind$C2_Ns)
data_ind$C3_Ns <- as.numeric(data_ind$C3_Ns)
data_ind$C4_Ns_c <- as.numeric(data_ind$C4_Ns_c)
data_ind$C5_Ns <- as.numeric(data_ind$C5_Ns)
data_ind$C6_Ns <- as.numeric(data_ind$C6_Ns)
View(data_ind)

#If "N/A"s don't transform to missing values
#data_ind$C1_s1[data_ind$C1_s1 == "N/A"] <- NA
#data_ind$C2_s1[data_ind$C1_s1 == "N/A"] <- NA
#data_ind$C3_s1[data_ind$C1_s1 == "N/A"] <- NA
#data_ind$C4_s1[data_ind$C1_s1 == "N/A"] <- NA
#data_ind$C5_s1[data_ind$C1_s1 == "N/A"] <- NA
#data_ind$C6_s1[data_ind$C1_s1 == "N/A"] <- NA
#data_ind$C1_Ns[data_ind$C1_s1 == "N/A"] <- NA
#data_ind$C2_Ns[data_ind$C1_s1 == "N/A"] <- NA
#data_ind$C3_Ns[data_ind$C1_s1 == "N/A"] <- NA
#data_ind$C4_Ns_c[data_ind$C1_s1 == "N/A"] <- NA
#data_ind$C5_Ns[data_ind$C1_s1 == "N/A"] <- NA
#data_ind$C6_Ns[data_ind$C1_s1 == "N/A"] <- NA

#Selecting respondents who finished the survey
sel <- data.frame(
  data_ind$C1_s1[data_ind$Finished == 1], data_ind$C2_s1[data_ind$Finished == 1], data_ind$C3_s1[data_ind$Finished == 1], 
  data_ind$C4_s1[data_ind$Finished == 1], data_ind$C5_s1[data_ind$Finished == 1], data_ind$C6_s1[data_ind$Finished == 1],
  data_ind$C1_Ns[data_ind$Finished == 1], data_ind$C2_Ns[data_ind$Finished == 1], data_ind$C3_Ns[data_ind$Finished == 1], 
  data_ind$C4_Ns_c[data_ind$Finished == 1], data_ind$C5_Ns[data_ind$Finished == 1], data_ind$C6_Ns[data_ind$Finished == 1]
)
colnames(sel) <- c("s1_c1", "s1_c2", "s1_c3", "s1_c4", "s1_c5", "s1_c6",
                     "Ns_c1", "Ns_c2", "Ns_c3", "Ns_c4", "Ns_c5", "Ns_c6")

#Counting frequencies of answers in the dataset
library(plyr)
counts <- data.frame(c("Too small", "As expected", "Too large", "Hard to say"),
                     count(sel$s1_c1),      count(sel$Ns_c1)[ ,2], 
                     count(sel$s1_c2)[ ,2], count(sel$Ns_c2)[ ,2],
                     count(sel$s1_c3)[ ,2], count(sel$Ns_c3)[ ,2],
                     count(sel$s1_c4)[ ,2], count(sel$Ns_c4)[ ,2],
                     count(sel$s1_c5)[ ,2], count(sel$Ns_c5)[ ,2],
                     count(sel$s1_c6)[ ,2], count(sel$Ns_c6)[ ,2])
colnames(counts) <- c("label_value", "value", 
                      "Largest Party: Case 1 (Luxembourg 1999)", "Fragmentation: Case 1 (Luxembourg 1999)",
                      "Largest Party: Case 2 (Canada 2006)", "Fragmentation: Case 2 (Canada 2006)",
                      "Largest Party: Case 3 (Netherlands 2017)", "Fragmentation: Case 3 (Netherlands 2017)",
                      "Largest Party: Case 4 (Portugal 1991)", "Fragmentation: Case 4 (Portugal 1991)",
                      "Largest Party: Case 5 (United Kingdom 1959)", "Fragmentation: Case 5 (United Kingdom 1959)",
                      "Largest Party: Case 6 (Slovakia 2012)", "Fragmentation: Case 6 (Slovakia 2012)")


percs <- data.frame(counts$"label_value", counts$"value",
  (counts$"Largest Party: Case 1 (Luxembourg 1999)"/sum(counts$"Largest Party: Case 1 (Luxembourg 1999)"))*100, 
  (counts$"Fragmentation: Case 1 (Luxembourg 1999)"/sum(counts$"Fragmentation: Case 1 (Luxembourg 1999)"))*100,
  (counts$"Largest Party: Case 2 (Canada 2006)"/sum(counts$"Largest Party: Case 2 (Canada 2006)"))*100, 
  (counts$"Fragmentation: Case 2 (Canada 2006)"/sum(counts$"Fragmentation: Case 2 (Canada 2006)"))*100,
  (counts$"Largest Party: Case 3 (Netherlands 2017)"/sum(counts$"Largest Party: Case 3 (Netherlands 2017)"))*100, 
  (counts$"Fragmentation: Case 3 (Netherlands 2017)"/sum(counts$"Fragmentation: Case 3 (Netherlands 2017)"))*100,
  (counts$"Largest Party: Case 4 (Portugal 1991)"/sum(counts$"Largest Party: Case 4 (Portugal 1991)"))*100, 
  (counts$"Fragmentation: Case 4 (Portugal 1991)"/sum(counts$"Fragmentation: Case 4 (Portugal 1991)"))*100,
  (counts$"Largest Party: Case 5 (United Kingdom 1959)"/sum(counts$"Largest Party: Case 5 (United Kingdom 1959)"))*100, 
  (counts$"Fragmentation: Case 5 (United Kingdom 1959)"/sum(counts$"Fragmentation: Case 5 (United Kingdom 1959)"))*100,
  (counts$"Largest Party: Case 6 (Slovakia 2012)"/sum(counts$"Largest Party: Case 6 (Slovakia 2012)"))*100, 
  (counts$"Fragmentation: Case 6 (Slovakia 2012)"/sum(counts$"Fragmentation: Case 6 (Slovakia 2012)"))*100
)

colnames(percs) <- c("label_value", "value", 
                      "Largest Party: Case 1 (Luxembourg 1999)", "Fragmentation: Case 1 (Luxembourg 1999)",
                      "Largest Party: Case 2 (Canada 2006)", "Fragmentation: Case 2 (Canada 2006)",
                      "Largest Party: Case 3 (Netherlands 2017)", "Fragmentation: Case 3 (Netherlands 2017)",
                      "Largest Party: Case 4 (Portugal 1991)", "Fragmentation: Case 4 (Portugal 1991)",
                      "Largest Party: Case 5 (United Kingdom 1959)", "Fragmentation: Case 5 (United Kingdom 1959)",
                      "Largest Party: Case 6 (Slovakia 2012)", "Fragmentation: Case 6 (Slovakia 2012)")
                        

#Transforming wide dataset into a long one
library(tidyr)
counts_long <- gather(counts, case, freq, "Largest Party: Case 1 (Luxembourg 1999)":"Fragmentation: Case 6 (Slovakia 2012)")
#setting the order for facet in ggplot
counts_long$case <- factor(counts_long$case, levels = c("Largest Party: Case 1 (Luxembourg 1999)", "Fragmentation: Case 1 (Luxembourg 1999)",
                                                        "Largest Party: Case 2 (Canada 2006)", "Fragmentation: Case 2 (Canada 2006)",
                                                        "Largest Party: Case 3 (Netherlands 2017)", "Fragmentation: Case 3 (Netherlands 2017)",
                                                        "Largest Party: Case 4 (Portugal 1991)", "Fragmentation: Case 4 (Portugal 1991)",
                                                        "Largest Party: Case 5 (United Kingdom 1959)", "Fragmentation: Case 5 (United Kingdom 1959)",
                                                        "Largest Party: Case 6 (Slovakia 2012)", "Fragmentation: Case 6 (Slovakia 2012)"))

percs_long <- gather(percs, case, freq, "Largest Party: Case 1 (Luxembourg 1999)":"Fragmentation: Case 6 (Slovakia 2012)")
#setting the order for facet in ggplot
percs_long$case <- factor(percs_long$case, levels = c("Largest Party: Case 1 (Luxembourg 1999)", "Fragmentation: Case 1 (Luxembourg 1999)",
                                                        "Largest Party: Case 2 (Canada 2006)", "Fragmentation: Case 2 (Canada 2006)",
                                                        "Largest Party: Case 3 (Netherlands 2017)", "Fragmentation: Case 3 (Netherlands 2017)",
                                                        "Largest Party: Case 4 (Portugal 1991)", "Fragmentation: Case 4 (Portugal 1991)",
                                                        "Largest Party: Case 5 (United Kingdom 1959)", "Fragmentation: Case 5 (United Kingdom 1959)",
                                                        "Largest Party: Case 6 (Slovakia 2012)", "Fragmentation: Case 6 (Slovakia 2012)"))

#Figure 1: Barplots
library(ggplot2)
#png(file = "Fig1_bars.png", width = 3500, height = 5400, res = 600)
Fig.1.bars <- ggplot(data=percs_long, aes(x=label_value, y=freq)) + 
  geom_bar(stat="identity") +
  geom_text(aes(label=sprintf("%0.1f", round(freq, digits = 1))), vjust=-0.5, size=3.5) +
  scale_x_discrete(limits=c("Too large", "As expected", "Too small", "Hard to say")) +
  scale_y_continuous(breaks = c(0, 20, 40, 60, 80, 100), labels = c("0%", "20%", "40%", "60%", "80%", "100%"), limits = c(0, 100)) +
  theme(plot.title = element_text(size = 10), 
        axis.title.x = element_blank(), 
        axis.title.y = element_blank(),
        panel.border = element_rect(fill = "transparent"),
        strip.background = element_rect(colour = "black"),
        panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(colour = "gray90"),
        panel.grid.minor.y = element_line(colour = "gray90"),
        panel.background = element_blank()) +
  coord_cartesian(ylim = c(4.5, 95.5)) +
  facet_wrap(~case, nrow = 6)
Fig.1.bars
#dev.off()

#Table 1: Descriptive statistics
library(psych)
describe(sel)
Table.1 <- data.frame(c("s1: Case 1: Luxembourg 1999", "s1: Case 2: Canada 2006", "s1: Case 3: Netherlands 2017", 
                        "s1: Case 4: Portugal 1991", "s1: Case 5: United Kingdom 1959", "s1: Case 6: Slovakia 2012",
                        "Ns: Case 1: Luxembourg 1999", "Ns: Case 2: Canada 2006", "Ns: Case 3: Netherlands 2017", 
                        "Ns: Case 4: Portugal 1991", "Ns: Case 5: United Kingdom 1959", "Ns: Case 6: Slovakia 2012"),
                      describe(sel)$n,
                      describe(sel)$mean,
                      describe(sel)$sd,
                      describe(sel)$min,
                      describe(sel)$max)
colnames(Table.1) <- c("Case", "n", "mean", "std. dev.", "min", "max")
write.csv(Table.1, "tab1_descriptives.csv")



#COMPARISON EXPERTS vs. DEVIATIONS
#Extracting means for Figures 2 and 3
descr <- data.frame(describe(sel))
data_Nss1 <- data.frame(c("Case 1", "Case 2", "Case 3", "Case 4", "Case 5", "Case 6"),
                        c("Luxembourg 1999", "Canada 2006", "Netherlands 2017", "Portugal 1991", "United Kingdom 1959", "Slovakia 2012"),
                        descr$mean[1:6], c(-0.130, -0.084, -0.114, 0.191, 0.113, 0.287),          #adding ds1 values
                        descr$mean[7:12], c(0.145, 0.091, 0.184, -0.215, -0.169, -0.266))         #adding dNs values
colnames(data_Nss1) <- c("case", "label", "s1_exp", "s1_d", "Ns_exp", "Ns_d")
View(data_Nss1)
rm(descr)

#FIGURE 2: LARGEST PARTY SHARE 
#Experts vs. d_s1
m.s1 <- lm(data_Nss1$s1_d ~ data_Nss1$s1_exp)
summary(m.s1)

library(Hmisc)
rcorr(data_Nss1$s1_d, data_Nss1$s1_exp, type="pearson")

library(ggplot2)
library(ggrepel)
#png(file = "Fig2_s1.png", width = 3000, height = 3000, res = 600)
#R2s1 <- signif(summary(m.s1)$r.squared, 3)   #R^2 value for title
Fig.2.s1 <- ggplot(data_Nss1, aes(x=s1_exp, y=s1_d, label=label)) + 
  geom_point(size=1, color="black") +
  scale_x_continuous(breaks = c(-1, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 0.75, 1), 
                     limits = c(-1, 1)) +
  scale_y_continuous(breaks = c(-0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3), 
                     limits = c(-0.3, 0.3)) +
  labs(#title = bquote("Figure 3: Largest Party Share - Experts vs. Deviation (" ~ italic(R)^2 == .(R2s1) ~ ")"),
    y = expression(paste("Deviation: ",italic(d[s[1]]) == "log[", italic(s[1])/(italic(MS))^"-1/8", "]" )),
    x = "Average of experts") +
  geom_segment(aes(x = 0, xend = 0, y = -0.3, yend = 0.3), colour = "grey70", linetype = "solid") +
  geom_segment(aes(x = -1, xend = 1, y = 0, yend = 0), colour = "grey70", linetype = "solid") +
  geom_segment(aes(x = -1, xend = -1, y = -0.3, yend = 0.3)) +
  geom_segment(aes(x = 1, xend = 1, y = -0.3, yend = 0.3)) +
  geom_segment(aes(x = -1, xend = 1, y = -0.3, yend = -0.3), linetype = "longdash") +
  geom_segment(aes(x = -1, xend = 1, y = 0.3, yend = 0.3), linetype = "longdash") + 
  geom_smooth(method = "lm", se = FALSE, colour = "black", fullrange = TRUE) +
  geom_text_repel(size = 2.75) +
  theme(panel.grid.major = element_line(colour = "gray90"),
        panel.grid.minor = element_line(colour = "gray90"),
        panel.background = element_blank(),
        plot.margin=unit(c(0.5, 0.5, 0.5, 0.5),"cm")) +
  coord_cartesian(xlim =c(-0.91, 0.91), ylim = c(-0.276, 0.276))
Fig.2.s1
#dev.off()

#FIGURE 3: FRAGMENTATION 
#Experts vs. d_Ns
m.Ns <- lm(data_Nss1$Ns_d ~ data_Nss1$Ns_exp)
summary(m.Ns)

library(Hmisc)
rcorr(data_Nss1$Ns_d, data_Nss1$Ns_exp, type="pearson")

library(ggplot2)
library(ggrepel)
#png(file = "Fig3_Ns.png", width = 3000, height = 3000, res = 600)
#R2Ns <- signif(summary(m.Ns)$r.squared, 3)   #R^2 value for title
Fig.3.Ns <- ggplot(data_Nss1, aes(x=Ns_exp, y=Ns_d, label=label)) + 
  geom_point(size=1, color="black") +
  scale_x_continuous(breaks = c(-1, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 0.75, 1), 
                     limits = c(-1, 1)) +
  scale_y_continuous(breaks = c(-0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3), 
                     limits = c(-0.3, 0.3)) +
  labs(#title = bquote("Figure 2: Party System Fragmentation - Experts vs. Deviation (" ~ italic(R)^2 == .(R2Ns) ~ ")"),
       y = expression(paste("Deviation: ",italic(d[N[s]]) == "log[", italic(N[s])/(italic(MS))^"1/6", "]" )), 
       x = "Average of experts") +
  geom_segment(aes(x = 0, xend = 0, y = -0.3, yend = 0.3), colour = "grey70", linetype = "solid") +
  geom_segment(aes(x = -1, xend = 1, y = 0, yend = 0), colour = "grey70", linetype = "solid") +
  geom_segment(aes(x = -1, xend = -1, y = -0.3, yend = 0.3)) +
  geom_segment(aes(x = 1, xend = 1, y = -0.3, yend = 0.3)) +
  geom_segment(aes(x = -1, xend = 1, y = -0.3, yend = -0.3), linetype = "longdash") +
  geom_segment(aes(x = -1, xend = 1, y = 0.3, yend = 0.3), linetype = "longdash") + 
  geom_smooth(method = "lm", se = FALSE, colour = "black", fullrange = TRUE) +
  geom_text_repel(size = 2.75) +
  theme(panel.grid.major = element_line(colour = "gray90"),
        panel.grid.minor = element_line(colour = "gray90"),
        panel.background = element_blank(),
        plot.margin=unit(c(0.5, 0.5, 0.5, 0.5),"cm")) +
  coord_cartesian(xlim =c(-0.91, 0.91), ylim = c(-0.276, 0.276))
Fig.3.Ns
#dev.off()

#TABLE 2: REGRESSION
library(stargazer)
stargazer(m.s1, m.Ns,
          type="html", out="table2.htm")